Load data

#> Info: preparing to read 1 data file(s)...
#> Info: reading file 1/1 'GB_SH1_lowres.cf.rda' with '.rda' reader...
#> Info: loaded data for 59 data files from R Data Archive - checking loaded files for content consistency...
#> Info: encountered 4 problems in total.
#> # A tibble: 4 x 4
#>   file_id                 type  func           details                    
#>   <chr>                   <chr> <chr>          <chr>                      
#> 1 504__A5__1_5UL_30ng.dxf error extract_dxf_r… cannot identify measured m…
#> 2 504__A5__1_5UL_30ng.dxf error extract_isoda… object 'mass' not found    
#> 3 503____.dxf             error extract_dxf_r… cannot identify measured m…
#> 4 503____.dxf             error extract_isoda… object 'mass' not found

Vendor data table

#> Info: aggregating vendor data table without units from 59 data file(s), including file info 'c(ox = `Seed Oxidation`, everything())'

Map peaks

Chromatograms

Retrieve metadata

Map peaks

# add metadata
data_w_metadata <- vdt %>% 
  iso_add_metadata(metadata_samples, match_by = c(`file_id`)) 
#> Info: metadata added to 2007 data rows, 0 left without metadata:
#> - 57 metadata entries were mapped to 2007 data entries via column 'file_id'

# missing metadata
data_w_metadata %>% 
  iso_get_missing_metadata(select = c(Analysis, `file_id`, map_id))
#> Info: fetching data entries that are missing metadata

# map peaks
data_w_peaks_all <- data_w_metadata %>% 
  # only continue with records that have metadata and are flagged for processing
  iso_remove_missing_metadata() %>% 
  filter(process == "yes") %>% 
  # map peaks
  iso_map_peaks(metadata_peak_maps, file_id = file_id, rt = Rt, rt_start = Start, rt_end = End)
#> Info: removing 0 of 2007 data entries because of missing metadata
#> 

# problematic peaks
prob <- data_w_peaks_all %>% 
  iso_get_problematic_peaks(select = c(file_id, Analysis, compound, Start, Rt, End))
#> Info: fetching 1015 of 2225 data table entries with problematic peak identifications (unidentified, missing or ambiguous)

# focus on non problematic peaks only
data_w_peaks <- data_w_peaks_all %>% iso_remove_problematic_peaks()
#> Info: removing 1015 of 2225 data table entries because of problematic peak identifications (unidentified, missing or ambiguous)
data_w_peaks

Reference peaks

data_w_peaks %>% 
  iso_plot_ref_peaks(
    is_ref_condition = is_ref_peak == "yes", 
    ratio = c(`R 13C/12C`, `R 18O/16O`),
    group_id = `Analysis`,
    within_group = TRUE
  ) 

Data processing

Focus on analytes

Focus on the analytes and calculate a few summary parameters we want to use later.

Evaluate Standards

Known isotope values

Add known isotope values for standards.

#> Info: matching standards by 'type' and 'compound' - added 15 standard entries to 285 out of 697 rows

Single analysis calibration (for QA)

Determine calibrations fits for individual standard analyses.

#> Info: preparing data for calibration by grouping based on 'Analysis'
#> Info: generating 'd13C' calibration based on 1 model ('lm(`d 13C/12C` ~ true_d13C)') for 19 data group(s) with standards filter 'is_standard'. Storing residuals in new column 'd13C_resid'.
#> Info: there were no problematic calibrations
#> Warning in `[<-.data.frame`(`*tmp*`, is_list, value = structure(list(`2` =
#> "<>", : replacement element 1 has 1 row to replace 0 rows
#> Warning in `[<-.data.frame`(`*tmp*`, is_list, value = structure(list(`2` =
#> "<>", : replacement element 2 has 1 row to replace 0 rows

Calibration coefficients

Look at calibration parameters (coefficients and summary) as well as data ranges in overview table.

Visualize the calibration parameters

# unnest some of the data for plotting
data_for_plots <- 
  stds_w_calibs %>%
  iso_unnest_data(
    c(Oxidation = `Seed Oxidation`, 
      datetime = file_datetime,
      `Mean Amplitude` = ampl_sample_mean.mV)
  ) 
data_for_plots


# parameters vs time
data_for_plots %>%
  iso_plot_calibration_parameters(
    calibration = "d13C", 
    x = datetime,
    color = `Mean Amplitude`,
    shape = Oxidation
  )


# parameter vs analysis
data_for_plots %>% 
  iso_plot_calibration_parameters(
    calibration = "d13C", 
    x = paste(Analysis),
    color = `Mean Amplitude`,
    shape = Oxidation
  )


# parameters vs amplitude
data_for_plots %>% 
  iso_plot_calibration_parameters(
    calibration = "d13C", 
    x = `Mean Amplitude`
  ) 

# turn the last plot into an interactive one
ggplotly(ggplot2::last_plot() + theme(legend.position = "none"))

Inspect Residuals

Look at residuals to see goodness of fit for the single analysis calibrations.

stds_w_calibs %>% 
  # pull out all data
  iso_unnest_data(select = everything()) %>% 
  filter(is_standard) %>% 
  # calculate deviation from means
  group_by(file_id) %>% 
  mutate(
    `Var: residual d13C [permil]` = d13C_resid,
    `Var: area diff from mean [%]` = (`Intensity 44`/mean(`Intensity 44`) - 1) * 100,
    `Var: amplitude diff from mean [%]` = (`Ampl 44`/mean(`Ampl 44`) - 1) * 100
  ) %>%
  ungroup() %>% 
  # visualize
  iso_plot_data(
    x = compound,
    y = starts_with("Var"),
    group = file_id
  ) +
  # plot modifications
  labs(x = "")

ggplotly()

Gobal calibration

Test different calibration models

Run global calibrations across all standards.

#> Info: preparing data for calibration by nesting the entire dataset
#> Info: generating 'd13C' calibration based on 4 models ('delta_only = lm(`d 13C/12C` ~ true_d13C)', 'delta_and_ampl = lm(`d 13C/12C` ~ true_d13C + `Ampl 44`)', 'delta_cross_ampl = lm(`d 13C/12C` ~ true_d13C * `Ampl 44`)', 'delta_and_time = lm(`d 13C/12C` ~ true_d13C + file_datetime)') for 1 data group(s) with standards filter 'type == "standard" & is_standard'. Storing residuals in new column 'd13C_resid'.

Visualize the calibration parameters for the different models.

# coefficient summary table
data_w_calibs %>% iso_unnest_calibration_parameters("d13C")

# plot to look at coefficients and summary
data_w_calibs %>% 
  iso_plot_calibration_parameters(
    calibration = "d13C",
    x = d13C_calib,
    color = d13C_calib,
    # include the significance level to gauge which model parameters matter
    shape = signif
  )

ggplotly()

It looks like the more complex calibration models don’t really add any value here. Check out the residuals to double check. It’s clear from the residuals that the models all perform similarly well across all the standards. Because the simplest model that makes physical sense and captures the variation typically is the best to use, we’re going with the delta_only model in this case. #Garrett thinks delta only looks best here

data_w_calibs %>% 
  iso_unnest_data(select = everything()) %>% 
  rename(`Injection Volume` = `Identifier 2`) %>% 
  filter(type == "standard" & is_standard) %>% 
  # plot each standard analysis for each of the models to see the differences
  iso_plot_data(
    x = compound,
    y = d13C_resid,
    group = paste(Analysis, d13C_calib),
    color = d13C_calib
  ) 

Apply calibration

Apply the delta_only calibration.

#> Info: applying 'd13C' calibration to infer 'true_d13C' for 1 data group(s); storing resulting value in new column 'true_d13C_pred' and estimated error in new column 'true_d13C_pred_se'. This may take a moment... finished.

Inspect results

The comparison of the predicted value in the standards along with the standard error in the prediction (the inherent error of the global calibration itself, which should be treated as the analytical error) can give a better sense of how precise the data is.

Visualization

Many different ways exist to visualize the data. Regardless of the plotting method it is helpful to keep in mind that the calibration provides not just the predicted value of a peak but also the standard error based on the calibration and an indicator whether a predicted data point was in the calibration range or not.

data_calibrated %>% 
  iso_plot_data(
    x = Analysis, 
    y = true_d13C_pred, 
    y_error = true_d13C_pred_se,
    color = true_d13C_pred_in_range,
    shape = type,
    points = TRUE,
    lines = FALSE
  ) + 
  # additional features beyond the default plot
  facet_wrap(~compound, scales = "free_y") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
  labs(color = "in calib. range")

ggplotly()

Plots with Data

all relevant

data_calibrated %>% 
  filter(compound %in% c("pristane", "phytane", "Sterane", "C15", "C19", "C21", "C23",  "C27", "C29", "C31", "C33"))%>%
  #, true_d13C_pred_in_range == TRUE
  iso_plot_data(
    x = Depth, 
    y = true_d13C_pred, 
    y_error = true_d13C_pred_se,
    color = true_d13C_pred_in_range
  ) + 
  # additional features beyond the default plot
  facet_wrap(~compound, scales = "free_y") + 
  geom_point() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
  labs(color = "in calib. range") +
  scale_x_reverse() +
  coord_flip()
#> Warning: Removed 95 rows containing missing values (geom_errorbar).
#> Warning: Removed 95 rows containing missing values (geom_point).

export to combine with other data for final plots (see 20180619_GB_SH1_adducts.Rmd)

final_lowres <- data_calibrated %>% filter(Analysis != 493) %>% filter(Analysis != 495) %>%
  select(
      # sample info
      `file_id`, `compound`, `Identifier 1`, depth = `Depth`,  `measurement_info`, `ampl_sample_mean.mV`, `file_datetime`, `type` , 
      # peak info
       `Ampl 44`, true_d13C,  true_d13C_pred , true_d13C_pred_se, true_d13C_pred_in_range,  d13C_resid, true_d13C_pred_in_range
    ) %>% apply(.,2,as.character)
write.csv(final_lowres, file = "lowres_d13c.csv")